import pandas as pd
import numpy as np
import plotly
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt
from scipy import integrate
from scipy.signal import butter, lfilter, sosfiltfilt, filtfilt
import warnings
warnings.filterwarnings("ignore")
custom_template = {
"layout": go.Layout(
font={
"family": "Arial",
"size": 14,
"color": "#707070",
},
title={
"font": {
"family": "Arial",
"size": 18,
"color": "#1f1f1f",
},
},
plot_bgcolor="#ffffff",
paper_bgcolor="#ffffff",
colorway=px.colors.qualitative.G10,
)
}
def format_title(title, subtitle=None, subtitle_font_size=14):
title = f'<b>{title}</b>'
if not subtitle:
return title
subtitle = f'<span style="font-size: {subtitle_font_size}px;">{subtitle}</span>'
return f'{title}<br>{subtitle}'
pre_path = "../data/preprocessed/measurement_windio_msb-0002-a_2021-10-21.csv"
df = pd.read_csv(pre_path, parse_dates=["date_time"], index_col="date_time")
df.head()
| uptime | acc_x | acc_y | acc_z | rot_x | rot_y | rot_z | mag_x | mag_y | mag_z | temp | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| date_time | |||||||||||
| 2021-10-21 09:42:23.724786432 | 54005.42 | -0.028870 | -0.024475 | 0.978333 | 0.931298 | 0.702290 | -0.854962 | -292.0 | 39.0 | -271.0 | 2896.0 |
| 2021-10-21 09:42:23.744237312 | 54005.44 | -0.030334 | -0.023865 | 0.981567 | 0.992366 | 0.732824 | -0.702290 | -303.0 | 40.0 | -272.0 | 2880.0 |
| 2021-10-21 09:42:23.763693056 | 54005.46 | -0.029724 | -0.020996 | 0.981995 | 0.961832 | 0.824427 | -0.839695 | -289.0 | 42.0 | -276.0 | 2848.0 |
| 2021-10-21 09:42:23.783293696 | 54005.48 | -0.027405 | -0.025330 | 0.975708 | 0.809160 | 0.748092 | -0.809160 | -307.0 | 29.0 | -281.0 | 2880.0 |
| 2021-10-21 09:42:23.802685440 | 54005.50 | -0.027527 | -0.021606 | 0.979919 | 0.977099 | 0.610687 | -0.854962 | -289.0 | 39.0 | -269.0 | 2944.0 |
df.drop(['uptime','acc_z', 'rot_x', 'rot_y', 'rot_z', 'mag_x', 'mag_y', 'mag_z', 'temp'], axis=1, inplace=True)
df.dropna(inplace = True)
df.head()
| acc_x | acc_y | |
|---|---|---|
| date_time | ||
| 2021-10-21 09:42:23.724786432 | -0.028870 | -0.024475 |
| 2021-10-21 09:42:23.744237312 | -0.030334 | -0.023865 |
| 2021-10-21 09:42:23.763693056 | -0.029724 | -0.020996 |
| 2021-10-21 09:42:23.783293696 | -0.027405 | -0.025330 |
| 2021-10-21 09:42:23.802685440 | -0.027527 | -0.021606 |
# Convert acceleration unit from g to m/s^2
df["acc_x"] = df["acc_x"]*9.81
df["acc_y"] = df["acc_y"]*9.81
# Dataframe with same freq. as timeseries
t = df.index.astype('int64')/1e9
# Deinition of the used highpass-filter
def butter_highpass_sosfiltfilt(data, lowcut=0.1, fs=50, pad='even', padlen=500, order=9):
"""
applies a symmetric filter (no phase offset)
"""
if pad not in ('even', 'odd', 'constant', None):
raise Exception('please provide a valid padding')
# check if len(data > padlen)
if len(data) < padlen:
print(
f'padlen exceeds the number of available data points. Setting padlen to {len(data)}')
padlen = len(data) - 1
nyq = fs*0.5
low = lowcut/nyq
sos = butter(order, low, btype='highpass', output='sos')
y = sosfiltfilt(sos, data, padtype=pad, padlen=padlen)
return y
# Apply highpass-filter for acceleration-data
for c in ['acc_x', 'acc_y']:
df[c] = butter_highpass_sosfiltfilt(df[c])
# Calculate the velocity-data
for component in ['x', 'y']:
df.insert(loc=len(df.columns),
value=integrate.cumtrapz(
df[f'acc_{component}'], t, initial=0),
column=f'vel_{component}',)
# Apply highpass-filter for velocity-data
for c in ['vel_x', 'vel_y']:
df[c] = butter_highpass_sosfiltfilt(df[c])
# Calculate the position-data
for component in ['x', 'y']:
df.insert(loc=len(df.columns),
value=integrate.cumtrapz(
df[f'vel_{component}'], t, initial=0),
column=f'pos_{component}',)
# Apply highpass-filter for position-data
for c in ['pos_x', 'pos_y']:
df[c] = butter_highpass_sosfiltfilt(df[c])
# Rename columns
df.rename(columns={'acc_x': 'acc_x ($m/s^{2}$)','acc_y': 'acc_y ($m/s^{2}$)',
'vel_x': 'vel_x ($m/s$)', 'vel_y': 'vel_y ($m/s$)',
'pos_x': 'pos_x ($m$)', 'pos_y': 'pos_y ($m$)'
}, inplace=True, errors='raise')
df.head()
| acc_x ($m/s^{2}$) | acc_y ($m/s^{2}$) | vel_x ($m/s$) | vel_y ($m/s$) | pos_x ($m$) | pos_y ($m$) | |
|---|---|---|---|---|---|---|
| date_time | ||||||
| 2021-10-21 09:42:23.724786432 | 0.042270 | -0.030006 | -0.033023 | -0.010705 | 0.082647 | 0.004730 |
| 2021-10-21 09:42:23.744237312 | 0.027894 | -0.024017 | -0.032277 | -0.011219 | 0.081668 | 0.004484 |
| 2021-10-21 09:42:23.763693056 | 0.033878 | 0.004126 | -0.031607 | -0.011400 | 0.080689 | 0.004231 |
| 2021-10-21 09:42:23.783293696 | 0.056627 | -0.038384 | -0.030645 | -0.011723 | 0.079708 | 0.003970 |
| 2021-10-21 09:42:23.802685440 | 0.055428 | -0.001858 | -0.029479 | -0.012099 | 0.078740 | 0.003704 |
# Select subset of the origin time-series
df2 = df.iloc[5000:30000,:]
fig = px.line(df2, x='pos_x ($m$)', y='pos_y ($m$)', title=format_title("Windturbinenkinematik aus der Draufsicht."),
labels={
'pos_x ($m$)': "Position X (m)",
'pos_y ($m$)': "Position Y (m)"
},width=800, height=800, template=custom_template)
fig.show()
fig = px.line(df2, y='pos_x ($m$)', title=format_title("Position in X Dimension."),
labels={
'date_time': "Zeit (mm:ss)",
'pos_x ($m$)': "Position X (m)"
}, width=1200, height=400, template=custom_template)
fig.show()
fig = px.line(df2, y='pos_y ($m$)', title=format_title("Position in Y Dimension."),
labels={
'date_time': "Zeit (mm:ss)",
'pos_y ($m$)': "Position Y (m)"
}, width=1200, height=400, template=custom_template)
fig.show()
df = df.resample("S").mean()
df.dropna(inplace = True)
train = df.iloc[:-(round(0.02*len(df)))]
test=df.iloc[-(round(0.02*len(df))):]
model = ARIMA(train['pos_x ($m$)'], order=(6,0,6))
model = model.fit()
pred=model.predict(start=len(train), end=len(train)+len(test)-1, typ="levels")
pred.index=df.index[len(train):len(train)+len(test)]
test["pred ($m$)"] = pred
rmse = sqrt(mean_squared_error(test["pred ($m$)"], test['pos_x ($m$)']))
fig = px.line(test, y=['pos_x ($m$)', "pred ($m$)"], title=format_title("Position in X Dimension.", "RMSE: " + str(round(rmse, 7))),
labels={
'date_time': "Zeit (hh:mm:ss)",
'value': "Position X (m)",
'variable': ""
}, width=1200, height=400, template=custom_template)
newnames = {"pos_x ($m$)":'Testdaten', "pred ($m$)": 'ARIMA Prädiktion'}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
legendgroup = newnames[t.name],
hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])))
fig.show()
model = ARIMA(train['pos_y ($m$)'], order=(6,0,3))
model = model.fit()
pred=model.predict(start=len(train), end=len(train)+len(test)-1, typ="levels")
pred.index=df.index[len(train):len(train)+len(test)]
test["pred ($m$)"] = pred
rmse = sqrt(mean_squared_error(test["pred ($m$)"], test['pos_y ($m$)']))
fig = px.line(test, y=['pos_y ($m$)', "pred ($m$)"], title=format_title("Position in Y Dimension.", "RMSE: " + str(round(rmse, 7))),
labels={
'date_time': "Zeit (hh:mm:ss)",
'value': "Position Y (m)",
'variable': ""
}, width=1200, height=400, template=custom_template)
newnames = {"pos_y ($m$)":'Testdaten', "pred ($m$)": 'ARIMA Prädiktion'}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
legendgroup = newnames[t.name],
hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])))
fig.show()
df2 = pd.DataFrame(data={'date_time': [0], 'rmse_arima': [0], 'rmse_sarima': [0], 'rmse_prophet': [0]})
for i in range(1,15,1):
train = df.iloc[:-(round(0.013*i*len(df)))]
test=df.iloc[-(round(0.013*i*len(df))):]
model = ARIMA(train['pos_x ($m$)'], order=(6,0,6))
model = model.fit()
pred=model.predict(start=len(train), end=len(train)+len(test)-1, typ="levels")
pred.index=df.index[len(train):len(train)+len(test)]
rmse = sqrt(mean_squared_error(pred, test['pos_x ($m$)']))
df2 = df2.append({'date_time': len(test), 'rmse_arima': rmse}, ignore_index=True)
df2["std"] = test['pos_x ($m$)'].std()
fig = px.line(df2, x = "date_time", y=["rmse_arima","rmse_sarima", "rmse_prophet", "std"], title=format_title("RMSE für Position in X Dimension."),
labels={
'date_time': "Zeit (s)",
'value': "",
'variable': ""
}, width=1200, height=400, template=custom_template)
newnames = {"std":'STD', "rmse_arima": 'RMSE ARIMA', "rmse_sarima": 'RMSE SARIMA', "rmse_prophet": 'RMSE Prophet'}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
legendgroup = newnames[t.name],
hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])))
fig.show()
df2 = pd.DataFrame(data={'date_time': [0], 'rmse_arima': [0], 'rmse_sarima': [0], 'rmse_prophet': [0]})
for i in range(1,15,1):
train = df.iloc[:-(round(0.013*i*len(df)))]
test=df.iloc[-(round(0.013*i*len(df))):]
model = ARIMA(train['pos_y ($m$)'], order=(6,0,3))
model = model.fit()
pred=model.predict(start=len(train), end=len(train)+len(test)-1, typ="levels")
pred.index=df.index[len(train):len(train)+len(test)]
rmse = sqrt(mean_squared_error(pred, test['pos_y ($m$)']))
df2 = df2.append({'date_time': len(test), 'rmse_arima': rmse}, ignore_index=True)
df2["std"] = test['pos_y ($m$)'].std()
fig = px.line(df2, x = "date_time", y=["rmse_arima","rmse_sarima", "rmse_prophet", "std"], title=format_title("RMSE für Position in X Dimension."),
labels={
'date_time': "Zeit (s)",
'value': "",
'variable': ""
}, width=1200, height=400, template=custom_template)
newnames = {"std":'STD', "rmse_arima": 'RMSE ARIMA', "rmse_sarima": 'RMSE SARIMA', "rmse_prophet": 'RMSE Prophet'}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
legendgroup = newnames[t.name],
hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])))
fig.show()